# Run Robustness Check with Alternative Codebook
# Alexander F. Gazmararian
# agazmararian@gmail.com

library(tidyverse)
library(tidylog)
library(here)
library(jsonlite)
library(httr)
library(openai)
library(progress)

# Load all project functions
source(here("R", "load_functions.R"))
source(here("R", "annotation", "annotate.R"))

# Configuration for this robustness check
ROBUSTNESS_CHECK_ID <- "codebook_robustness"  # Change this identifier
CUSTOM_CODEBOOK_PATH <- here("R", "annotation", "codebook_alt.md")  # Set to NULL to use default
CUSTOM_PROBE_TEMPLATE_PATH <- here("R", "annotation", "probe_template_alt.md")
RESET_ANNOTATION <- FALSE

# Validate that custom files exist if specified
if (!is.null(CUSTOM_CODEBOOK_PATH) && !file.exists(CUSTOM_CODEBOOK_PATH)) {
  stop("Custom codebook not found at: ", CUSTOM_CODEBOOK_PATH)
}
if (!is.null(CUSTOM_PROBE_TEMPLATE_PATH) && !file.exists(CUSTOM_PROBE_TEMPLATE_PATH)) {
  stop("Custom probe template not found at: ", CUSTOM_PROBE_TEMPLATE_PATH)
}

message("=== ROBUSTNESS CHECK CONFIGURATION ===")
message(sprintf("Robustness check ID: %s", ROBUSTNESS_CHECK_ID))
message(sprintf("Custom codebook: %s", ifelse(is.null(CUSTOM_CODEBOOK_PATH), "default", CUSTOM_CODEBOOK_PATH)))
message(sprintf("Custom probe template: %s", ifelse(is.null(CUSTOM_PROBE_TEMPLATE_PATH), "default", CUSTOM_PROBE_TEMPLATE_PATH)))
message(sprintf("Reset annotation: %s", RESET_ANNOTATION))
message("======================================")

# Note: API key check is deferred to annotate_statements() - only needed if new API calls required

# Load processed data
log_message("INFO", "Loading processed statements for robustness check")
processed <- read_csv(here("data", "inter", "statements_processed.csv"), show_col_types = FALSE)

# Run annotation with robustness check settings
annotation_result <- annotate_statements(
  df_in = processed, 
  reset = RESET_ANNOTATION, 
  sleep_sec = 1,
  codebook_path = CUSTOM_CODEBOOK_PATH,
  robustness_check = ROBUSTNESS_CHECK_ID,
  probe_template_path = CUSTOM_PROBE_TEMPLATE_PATH
)

annotated <- annotation_result$annotations
new_work_done <- annotation_result$new_work_done

# Apply the same post-filtering as the main script
annotated_post <- annotated %>%
  # Only allow credit to Democrats if the party is explicitly named
  mutate(
    credit_dem = if_else(
      credit_dem == 1 &
        !grepl("\\bDemocrat(s)?\\b|\\bDemocratic Party\\b",
          statement,
          ignore.case = TRUE
        ),
      0L, credit_dem
    ),
    # Same for Republicans
    credit_gop = if_else(
      credit_gop == 1 &
        !grepl("\\bRepublican(s)?\\b|\\bRepublican Party\\b",
          statement,
          ignore.case = TRUE
        ),
      0L, credit_gop
    ),
    # Only allow credit to IRA if the IRA is explicitly mentioned
    credit_ira = if_else(
      credit_ira == 1 &
        !grepl("IRA|Inflation Reduction Act", statement, ignore.case = TRUE),
      0L, credit_ira
    ),
    # Same for the Bipartisan Infrastructure Law
    credit_bil = if_else(
      credit_bil == 1 &
        !grepl("Bipartisan Infrastructure Law|Infrastructure Investment and Jobs Act|BIL|IIJA", statement, ignore.case = TRUE),
      0L, credit_bil
    ),
    # Only allow credit to Biden if the president is explicitly named
    credit_biden = if_else(
      credit_biden == 1 &
        !grepl("(?<!Vice )President\\b|Biden\\b|White House\\b", statement, ignore.case = TRUE, perl = TRUE),
      0L, credit_biden
    )
  )

# Final processing
annotated_post <- annotated_post %>%
    dplyr::select(-statement) %>%
    dplyr::rename(statement_id = id)

# Save results with robustness check identifier
inter_filename <- sprintf("annotated_statements_%s.csv", ROBUSTNESS_CHECK_ID)
write_csv(annotated_post, here("data", "inter", inter_filename))
message(sprintf("Saved robustness check results: %s", here("data", "inter", inter_filename)))

# Save to cache/api if new work was done
if (new_work_done) {
  message("New annotations were created - saving to cache/api for archival")
  date_folder <- format(Sys.Date(), "%Y%m%d")
  cache_dir <- here("data", "cache", "annotations", "robustness", ROBUSTNESS_CHECK_ID, date_folder)
  
  if (!dir.exists(cache_dir)) {
    dir.create(cache_dir, recursive = TRUE)
  }
  
  cache_filename <- sprintf("annotated_statements_%s.csv", ROBUSTNESS_CHECK_ID)
  write_csv(annotated_post, file.path(cache_dir, cache_filename))
  message(sprintf("Saved to cache: %s", file.path(cache_dir, cache_filename)))
} else {
  message("No new annotations were created - skipping cache/api save")
}

message(sprintf("=== ROBUSTNESS CHECK COMPLETE ==="))
message(sprintf("Results saved with identifier: %s", ROBUSTNESS_CHECK_ID))
message(sprintf("Processed %d statements", nrow(annotated_post)))
message("==================================")
